library(tidyverse)
library(lubridate)
library(RColorBrewer)
report_03_11_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-11-2020.csv")) %>%
  rename(Country.Region = "Country/Region", Province.State = "Province/State")
## Parsed with column specification:
## cols(
##   `Province/State` = col_character(),
##   `Country/Region` = col_character(),
##   `Last Update` = col_datetime(format = ""),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Latitude = col_double(),
##   Longitude = col_double()
## )
head(report_03_11_2020)
## # A tibble: 6 x 8
##   Province.State Country.Region `Last Update`       Confirmed Deaths Recovered
##   <chr>          <chr>          <dttm>                  <dbl>  <dbl>     <dbl>
## 1 Hubei          China          2020-03-11 10:53:02     67773   3046     49134
## 2 <NA>           Italy          2020-03-11 21:33:02     12462    827      1045
## 3 <NA>           Iran           2020-03-11 18:52:03      9000    354      2959
## 4 <NA>           Korea, South   2020-03-11 21:13:18      7755     60       288
## 5 France         France         2020-03-11 22:53:03      2281     48        12
## 6 <NA>           Spain          2020-03-11 20:53:02      2277     54       183
## # … with 2 more variables: Latitude <dbl>, Longitude <dbl>
str(report_03_11_2020)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 216 obs. of  8 variables:
##  $ Province.State: chr  "Hubei" NA NA NA ...
##  $ Country.Region: chr  "China" "Italy" "Iran" "Korea, South" ...
##  $ Last Update   : POSIXct, format: "2020-03-11 10:53:02" "2020-03-11 21:33:02" ...
##  $ Confirmed     : num  67773 12462 9000 7755 2281 ...
##  $ Deaths        : num  3046 827 354 60 48 ...
##  $ Recovered     : num  49134 1045 2959 288 12 ...
##  $ Latitude      : num  31 43 32 36 46.2 ...
##  $ Longitude     : num  112.27 12 53 128 2.21 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Province/State` = col_character(),
##   ..   `Country/Region` = col_character(),
##   ..   `Last Update` = col_datetime(format = ""),
##   ..   Confirmed = col_double(),
##   ..   Deaths = col_double(),
##   ..   Recovered = col_double(),
##   ..   Latitude = col_double(),
##   ..   Longitude = col_double()
##   .. )
report_03_24_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-24-2020.csv")) %>% 
  rename(Country.Region = "Country_Region", Province.State = "Province_State") %>% 
  select(-FIPS, -Admin2)
## Parsed with column specification:
## cols(
##   FIPS = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
head(report_03_24_2020)
## # A tibble: 6 x 10
##   Province.State Country.Region Last_Update           Lat  Long_ Confirmed
##   <chr>          <chr>          <dttm>              <dbl>  <dbl>     <dbl>
## 1 South Carolina US             2020-03-24 23:37:31  34.2  -82.5         1
## 2 Louisiana      US             2020-03-24 23:37:31  30.3  -92.4         2
## 3 Virginia       US             2020-03-24 23:37:31  37.8  -75.6         1
## 4 Idaho          US             2020-03-24 23:37:31  43.5 -116.         19
## 5 Iowa           US             2020-03-24 23:37:31  41.3  -94.5         1
## 6 Kentucky       US             2020-03-24 23:37:31  37.1  -85.3         0
## # … with 4 more variables: Deaths <dbl>, Recovered <dbl>, Active <dbl>,
## #   Combined_Key <chr>
str(report_03_24_2020)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 3417 obs. of  10 variables:
##  $ Province.State: chr  "South Carolina" "Louisiana" "Virginia" "Idaho" ...
##  $ Country.Region: chr  "US" "US" "US" "US" ...
##  $ Last_Update   : POSIXct, format: "2020-03-24 23:37:31" "2020-03-24 23:37:31" ...
##  $ Lat           : num  34.2 30.3 37.8 43.5 41.3 ...
##  $ Long_         : num  -82.5 -92.4 -75.6 -116.2 -94.5 ...
##  $ Confirmed     : num  1 2 1 19 1 0 1 0 25 0 ...
##  $ Deaths        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Recovered     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Active        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined_Key  : chr  "Abbeville, South Carolina, US" "Acadia, Louisiana, US" "Accomack, Virginia, US" "Ada, Idaho, US" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   FIPS = col_character(),
##   ..   Admin2 = col_character(),
##   ..   Province_State = col_character(),
##   ..   Country_Region = col_character(),
##   ..   Last_Update = col_datetime(format = ""),
##   ..   Lat = col_double(),
##   ..   Long_ = col_double(),
##   ..   Confirmed = col_double(),
##   ..   Deaths = col_double(),
##   ..   Recovered = col_double(),
##   ..   Active = col_double(),
##   ..   Combined_Key = col_character()
##   .. )

Cases in US states as of March 11th

report_03_11_2020 %>% 
  filter(Country.Region =="US") %>% 
  ggplot(aes(Confirmed,reorder(Province.State,Confirmed)))+
  geom_point() +
  ggtitle("Confirmed cases for each US State")+
  ylab("Country/Region")+
  xlab("Confirmed Cases")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_US_march_11.png", width = 10, height = 10, dpi = "screen")
report_03_11_2020 %>% 
  group_by(Country.Region) %>% 
  summarise(Deaths=sum(Deaths)) %>% 
  arrange(desc(Deaths))
## # A tibble: 116 x 2
##    Country.Region Deaths
##    <chr>           <dbl>
##  1 China            3161
##  2 Italy             827
##  3 Iran              354
##  4 Korea, South       60
##  5 Spain              54
##  6 France             48
##  7 US                 36
##  8 Japan              15
##  9 United Kingdom      8
## 10 Cruise Ship         7
## # … with 106 more rows
report_03_11_2020 %>% 
  group_by(Country.Region) %>% 
  summarise(Deaths=sum(Deaths)) %>% 
  arrange(desc(Deaths)) %>% 
  slice(1:20) %>% 
  ggplot(aes(x=Deaths, y=reorder(Country.Region, Deaths)))+
  geom_bar(stat='identity')+
    ggtitle("The 20 countries with the most reported COV19-related deaths") +
  ylab("Country/Region")+
  xlab("Deaths")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Deaths_20_countries_march_11.png", width = 10, height = 10, dpi = "screen")

Times Series Data

time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province.State = "Province/State", Country.Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 68
##   Province.State Country.Region   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>          <chr>          <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>           Afghanistan     33    65            0         0         0
## 2 <NA>           Albania         41.2  20.2          0         0         0
## 3 <NA>           Algeria         28.0   1.66         0         0         0
## 4 <NA>           Andorra         42.5   1.52         0         0         0
## 5 <NA>           Angola         -11.2  17.9          0         0         0
## 6 <NA>           Antigua and B…  17.1 -61.8          0         0         0
## # … with 61 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>
str(time_series_confirmed)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 245 obs. of  68 variables:
##  $ Province.State: chr  NA NA NA NA ...
##  $ Country.Region: chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
##  $ Lat           : num  33 41.2 28 42.5 -11.2 ...
##  $ Long          : num  65 20.17 1.66 1.52 17.87 ...
##  $ 1/22/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/23/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/24/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/25/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/26/20       : num  0 0 0 0 0 0 0 0 0 3 ...
##  $ 1/27/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 1/28/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 1/29/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 1/30/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 1/31/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/1/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/2/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/3/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/4/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/5/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/6/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/7/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/8/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/9/20        : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/10/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/11/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/12/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/13/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/14/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/15/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/16/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/17/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/18/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/19/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/20/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/21/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/22/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/23/20       : num  0 0 0 0 0 0 0 0 0 4 ...
##  $ 2/24/20       : num  1 0 0 0 0 0 0 0 0 4 ...
##  $ 2/25/20       : num  1 0 1 0 0 0 0 0 0 4 ...
##  $ 2/26/20       : num  1 0 1 0 0 0 0 0 0 4 ...
##  $ 2/27/20       : num  1 0 1 0 0 0 0 0 0 4 ...
##  $ 2/28/20       : num  1 0 1 0 0 0 0 0 0 4 ...
##  $ 2/29/20       : num  1 0 1 0 0 0 0 0 0 4 ...
##  $ 3/1/20        : num  1 0 1 0 0 0 0 1 0 6 ...
##  $ 3/2/20        : num  1 0 3 1 0 0 0 1 0 6 ...
##  $ 3/3/20        : num  1 0 5 1 0 0 1 1 0 13 ...
##  $ 3/4/20        : num  1 0 12 1 0 0 1 1 0 22 ...
##  $ 3/5/20        : num  1 0 12 1 0 0 1 1 0 22 ...
##  $ 3/6/20        : num  1 0 17 1 0 0 2 1 0 26 ...
##  $ 3/7/20        : num  1 0 17 1 0 0 8 1 0 28 ...
##  $ 3/8/20        : num  4 0 19 1 0 0 12 1 0 38 ...
##  $ 3/9/20        : num  4 2 20 1 0 0 12 1 0 48 ...
##  $ 3/10/20       : num  5 10 20 1 0 0 17 1 0 55 ...
##  $ 3/11/20       : num  7 12 20 1 0 0 19 1 0 65 ...
##  $ 3/12/20       : num  7 23 24 1 0 0 19 4 0 65 ...
##  $ 3/13/20       : num  7 33 26 1 0 1 31 8 1 92 ...
##  $ 3/14/20       : num  11 38 37 1 0 1 34 18 1 112 ...
##  $ 3/15/20       : num  16 42 48 1 0 1 45 26 1 134 ...
##  $ 3/16/20       : num  21 51 54 2 0 1 56 52 2 171 ...
##  $ 3/17/20       : num  22 55 60 39 0 1 68 78 2 210 ...
##  $ 3/18/20       : num  22 59 74 39 0 1 79 84 3 267 ...
##  $ 3/19/20       : num  22 64 87 53 0 1 97 115 4 307 ...
##  $ 3/20/20       : num  24 70 90 75 1 1 128 136 6 353 ...
##  $ 3/21/20       : num  24 76 139 88 2 1 158 160 9 436 ...
##  $ 3/22/20       : num  40 89 201 113 2 1 266 194 19 669 ...
##  $ 3/23/20       : num  40 104 230 133 3 3 301 235 32 669 ...
##  $ 3/24/20       : num  74 123 264 164 3 3 387 249 39 818 ...
##  $ 3/25/20       : num  84 146 302 188 3 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Province/State` = col_character(),
##   ..   `Country/Region` = col_character(),
##   ..   Lat = col_double(),
##   ..   Long = col_double(),
##   ..   `1/22/20` = col_double(),
##   ..   `1/23/20` = col_double(),
##   ..   `1/24/20` = col_double(),
##   ..   `1/25/20` = col_double(),
##   ..   `1/26/20` = col_double(),
##   ..   `1/27/20` = col_double(),
##   ..   `1/28/20` = col_double(),
##   ..   `1/29/20` = col_double(),
##   ..   `1/30/20` = col_double(),
##   ..   `1/31/20` = col_double(),
##   ..   `2/1/20` = col_double(),
##   ..   `2/2/20` = col_double(),
##   ..   `2/3/20` = col_double(),
##   ..   `2/4/20` = col_double(),
##   ..   `2/5/20` = col_double(),
##   ..   `2/6/20` = col_double(),
##   ..   `2/7/20` = col_double(),
##   ..   `2/8/20` = col_double(),
##   ..   `2/9/20` = col_double(),
##   ..   `2/10/20` = col_double(),
##   ..   `2/11/20` = col_double(),
##   ..   `2/12/20` = col_double(),
##   ..   `2/13/20` = col_double(),
##   ..   `2/14/20` = col_double(),
##   ..   `2/15/20` = col_double(),
##   ..   `2/16/20` = col_double(),
##   ..   `2/17/20` = col_double(),
##   ..   `2/18/20` = col_double(),
##   ..   `2/19/20` = col_double(),
##   ..   `2/20/20` = col_double(),
##   ..   `2/21/20` = col_double(),
##   ..   `2/22/20` = col_double(),
##   ..   `2/23/20` = col_double(),
##   ..   `2/24/20` = col_double(),
##   ..   `2/25/20` = col_double(),
##   ..   `2/26/20` = col_double(),
##   ..   `2/27/20` = col_double(),
##   ..   `2/28/20` = col_double(),
##   ..   `2/29/20` = col_double(),
##   ..   `3/1/20` = col_double(),
##   ..   `3/2/20` = col_double(),
##   ..   `3/3/20` = col_double(),
##   ..   `3/4/20` = col_double(),
##   ..   `3/5/20` = col_double(),
##   ..   `3/6/20` = col_double(),
##   ..   `3/7/20` = col_double(),
##   ..   `3/8/20` = col_double(),
##   ..   `3/9/20` = col_double(),
##   ..   `3/10/20` = col_double(),
##   ..   `3/11/20` = col_double(),
##   ..   `3/12/20` = col_double(),
##   ..   `3/13/20` = col_double(),
##   ..   `3/14/20` = col_double(),
##   ..   `3/15/20` = col_double(),
##   ..   `3/16/20` = col_double(),
##   ..   `3/17/20` = col_double(),
##   ..   `3/18/20` = col_double(),
##   ..   `3/19/20` = col_double(),
##   ..   `3/20/20` = col_double(),
##   ..   `3/21/20` = col_double(),
##   ..   `3/22/20` = col_double(),
##   ..   `3/23/20` = col_double(),
##   ..   `3/24/20` = col_double(),
##   ..   `3/25/20` = col_double()
##   .. )
time_series_deaths <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
  rename(Province.State = "Province/State", Country.Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_deaths)
## # A tibble: 6 x 68
##   Province.State Country.Region   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>          <chr>          <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>           Afghanistan     33    65            0         0         0
## 2 <NA>           Albania         41.2  20.2          0         0         0
## 3 <NA>           Algeria         28.0   1.66         0         0         0
## 4 <NA>           Andorra         42.5   1.52         0         0         0
## 5 <NA>           Angola         -11.2  17.9          0         0         0
## 6 <NA>           Antigua and B…  17.1 -61.8          0         0         0
## # … with 61 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>
str(time_series_deaths)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 245 obs. of  68 variables:
##  $ Province.State: chr  NA NA NA NA ...
##  $ Country.Region: chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
##  $ Lat           : num  33 41.2 28 42.5 -11.2 ...
##  $ Long          : num  65 20.17 1.66 1.52 17.87 ...
##  $ 1/22/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/23/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/24/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/25/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/26/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/27/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/28/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/29/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/30/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 1/31/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/1/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/2/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/3/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/4/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/5/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/6/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/7/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/8/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/9/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/10/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/11/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/12/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/13/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/14/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/15/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/16/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/17/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/18/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/19/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/20/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/21/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/22/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/23/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/24/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/25/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/26/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/27/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/28/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 2/29/20       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 3/1/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 3/2/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 3/3/20        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ 3/4/20        : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ 3/5/20        : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ 3/6/20        : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ 3/7/20        : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ 3/8/20        : num  0 0 0 0 0 0 1 0 0 2 ...
##  $ 3/9/20        : num  0 0 0 0 0 0 1 0 0 2 ...
##  $ 3/10/20       : num  0 0 0 0 0 0 1 0 0 2 ...
##  $ 3/11/20       : num  0 1 0 0 0 0 1 0 0 2 ...
##  $ 3/12/20       : num  0 1 1 0 0 0 1 0 0 2 ...
##  $ 3/13/20       : num  0 1 2 0 0 0 2 0 0 2 ...
##  $ 3/14/20       : num  0 1 3 0 0 0 2 0 0 2 ...
##  $ 3/15/20       : num  0 1 4 0 0 0 2 0 0 2 ...
##  $ 3/16/20       : num  0 1 4 0 0 0 2 0 0 2 ...
##  $ 3/17/20       : num  0 1 4 0 0 0 2 0 0 4 ...
##  $ 3/18/20       : num  0 2 7 0 0 0 2 0 0 5 ...
##  $ 3/19/20       : num  0 2 9 0 0 0 3 0 0 5 ...
##  $ 3/20/20       : num  0 2 11 0 0 0 3 0 0 6 ...
##  $ 3/21/20       : num  0 2 15 0 0 0 4 0 0 6 ...
##  $ 3/22/20       : num  1 2 17 1 0 0 4 0 0 6 ...
##  $ 3/23/20       : num  1 4 17 1 0 0 4 0 0 6 ...
##  $ 3/24/20       : num  1 5 19 1 0 0 6 0 0 7 ...
##  $ 3/25/20       : num  2 5 21 1 0 0 8 0 0 7 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Province/State` = col_character(),
##   ..   `Country/Region` = col_character(),
##   ..   Lat = col_double(),
##   ..   Long = col_double(),
##   ..   `1/22/20` = col_double(),
##   ..   `1/23/20` = col_double(),
##   ..   `1/24/20` = col_double(),
##   ..   `1/25/20` = col_double(),
##   ..   `1/26/20` = col_double(),
##   ..   `1/27/20` = col_double(),
##   ..   `1/28/20` = col_double(),
##   ..   `1/29/20` = col_double(),
##   ..   `1/30/20` = col_double(),
##   ..   `1/31/20` = col_double(),
##   ..   `2/1/20` = col_double(),
##   ..   `2/2/20` = col_double(),
##   ..   `2/3/20` = col_double(),
##   ..   `2/4/20` = col_double(),
##   ..   `2/5/20` = col_double(),
##   ..   `2/6/20` = col_double(),
##   ..   `2/7/20` = col_double(),
##   ..   `2/8/20` = col_double(),
##   ..   `2/9/20` = col_double(),
##   ..   `2/10/20` = col_double(),
##   ..   `2/11/20` = col_double(),
##   ..   `2/12/20` = col_double(),
##   ..   `2/13/20` = col_double(),
##   ..   `2/14/20` = col_double(),
##   ..   `2/15/20` = col_double(),
##   ..   `2/16/20` = col_double(),
##   ..   `2/17/20` = col_double(),
##   ..   `2/18/20` = col_double(),
##   ..   `2/19/20` = col_double(),
##   ..   `2/20/20` = col_double(),
##   ..   `2/21/20` = col_double(),
##   ..   `2/22/20` = col_double(),
##   ..   `2/23/20` = col_double(),
##   ..   `2/24/20` = col_double(),
##   ..   `2/25/20` = col_double(),
##   ..   `2/26/20` = col_double(),
##   ..   `2/27/20` = col_double(),
##   ..   `2/28/20` = col_double(),
##   ..   `2/29/20` = col_double(),
##   ..   `3/1/20` = col_double(),
##   ..   `3/2/20` = col_double(),
##   ..   `3/3/20` = col_double(),
##   ..   `3/4/20` = col_double(),
##   ..   `3/5/20` = col_double(),
##   ..   `3/6/20` = col_double(),
##   ..   `3/7/20` = col_double(),
##   ..   `3/8/20` = col_double(),
##   ..   `3/9/20` = col_double(),
##   ..   `3/10/20` = col_double(),
##   ..   `3/11/20` = col_double(),
##   ..   `3/12/20` = col_double(),
##   ..   `3/13/20` = col_double(),
##   ..   `3/14/20` = col_double(),
##   ..   `3/15/20` = col_double(),
##   ..   `3/16/20` = col_double(),
##   ..   `3/17/20` = col_double(),
##   ..   `3/18/20` = col_double(),
##   ..   `3/19/20` = col_double(),
##   ..   `3/20/20` = col_double(),
##   ..   `3/21/20` = col_double(),
##   ..   `3/22/20` = col_double(),
##   ..   `3/23/20` = col_double(),
##   ..   `3/24/20` = col_double(),
##   ..   `3/25/20` = col_double()
##   .. )
time_series_confirmed_long <- time_series_confirmed %>% 
               pivot_longer(-c(Province.State, Country.Region, Lat, Long),
                            names_to = "Date", values_to = "Confirmed") %>% 
               group_by(Country.Region,Date) %>% 
               summarise(Confirmed = sum(Confirmed))

# convert date to data format
time_series_confirmed_long$Date <- mdy(time_series_confirmed_long$Date)
head(time_series_confirmed_long)
## # A tibble: 6 x 3
## # Groups:   Country.Region [1]
##   Country.Region Date       Confirmed
##   <chr>          <date>         <dbl>
## 1 Afghanistan    2020-01-22         0
## 2 Afghanistan    2020-01-23         0
## 3 Afghanistan    2020-01-24         0
## 4 Afghanistan    2020-01-25         0
## 5 Afghanistan    2020-01-26         0
## 6 Afghanistan    2020-01-27         0

US confirmed COVID-19 Cases

time_series_confirmed_long %>% 
  filter (Country.Region == "US") %>% 
    ggplot(aes(x = Date,  y = Confirmed)) + 
    geom_point() +
    geom_line() +
    ggtitle("US Confirmed COVID-19 Cases")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/US_all_march_11.png", width = 10, height = 10, dpi = "screen")

Colombia confirmed COVID-19 Cases

time_series_confirmed_long %>% 
  filter (Country.Region == "Colombia") %>% 
    ggplot(aes(x = Date,  y = Confirmed)) + 
    geom_point() +
    geom_line() +
    ggtitle("Colombia Confirmed COVID-19 Cases")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_colombia_march_11.png", width = 10, height = 10, dpi = "screen")

China confirmed COVID-19 Cases

time_series_confirmed_long %>% 
  filter (Country.Region == "China") %>% 
    ggplot(aes(x = Date,  y = Confirmed)) + 
    geom_point() +
    geom_line() +
    ggtitle("China Confirmed COVID-19 Cases")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_china_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed_long %>% 
  filter(Country.Region %in% c("China", "Japan", "Korea, South","Italy","Spain","US" )) %>% 
  ggplot(aes(x=Date, y=Confirmed))+
  geom_point()+
  geom_line()+
  ggtitle("Confirmed COVID-19 Cases") +
  facet_wrap(~Country.Region, ncol = 2, scales = "free_y")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_comparison_march_11.png", width = 10, height = 10, dpi = "screen")

Latin America COVID-19 Cases

time_series_confirmed_long %>% 
  filter(Country.Region %in% c("Colombia", "Chile", "Argentina","Brazil","Ecuador","Peru","Mexico","Venezuela" )) %>% 
  ggplot(aes(x=Date, y=Confirmed))+
  geom_point()+
  geom_line()+
  ggtitle("Confirmed COVID-19 Cases") +
  facet_wrap(~Country.Region, ncol = 2, scales = "free_y")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_LatinAmerica_comparison_march_11.png", width = 10, height = 10, dpi = "screen")

Same graph

cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
time_series_confirmed_long %>% 
  filter (Country.Region %in% c("China","France","Italy", 
                                "Korea, South", "US")) %>% 
    ggplot(aes(x = Date,  y = Confirmed, color = Country.Region)) + 
    geom_point() +
    scale_color_manual(values = cbPalette)+
    geom_line() +
    ggtitle("Confirmed COVID-19 Cases")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_all_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed_long %>% 
  filter (Country.Region %in% c("Colombia", "Chile", "Argentina","Brazil","Ecuador","Peru","Mexico","Venezuela")) %>% 
    ggplot(aes(x = Date,  y = Confirmed, color = Country.Region)) + 
    geom_point() +
    geom_line() +
   scale_color_manual(values = cbPalette)+
    ggtitle("Confirmed COVID-19 Cases")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_all2_march_11.png", width = 10, height = 10, dpi = "screen")
report_03_24_2020 %>% 
    filter (Country.Region == "US") %>% 
    group_by(Province.State) %>% 
    summarise(Confirmed = sum(Confirmed)) %>% 
    ggplot(aes(x = Confirmed, y = reorder(Province.State, Confirmed))) + 
    geom_point() + 
    ggtitle("Confirmed cases for each US State") + 
    ylab ("Country/Region") + 
    xlab("Confirmed Cases March 24") 

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_US_march_24.png", width = 10, height = 10, dpi = "screen")
report_03_24_2020 %>% 
    group_by(Country.Region) %>% 
    summarise(Deaths = sum(Deaths)) %>% 
    arrange(desc(Deaths)) %>% 
    slice(1:20) %>% 
    ggplot(aes(y = Deaths, x = reorder(Country.Region, Deaths))) + 
    geom_bar(stat = 'identity') + 
    ggtitle("The 20 Countries with the most reported COV19-related deaths March 24")+
    ylab("Country/Region") + 
    xlab("Deaths") + 
    coord_flip() 

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/deaths_world_march_24.png", width = 10, height = 10, dpi = "screen")
report_03_24_2020 %>% 
    group_by(Country.Region) %>% 
    summarise(Deaths = sum(Deaths)) %>% 
    arrange(desc(Deaths)) %>% 
    slice(1:20) %>% 
    ggplot(aes(y = Deaths, x = reorder(Country.Region, Deaths))) + 
    geom_bar(stat = 'identity') + 
    ggtitle("The 20 Countries with the most reported COV19-related deaths")+
    ylab("Country/Region") + 
    xlab("Deaths") + 
    coord_flip() 

library(maps)
library(viridis)
world <- map_data("world")
mybreaks <- c(1, 20, 100, 1000, 50000)

Confirmed cases 2/25/20

ggplot() +
  geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
  geom_point(data=time_series_confirmed, aes(x=Long, y=Lat, size=`2/25/20`, color=`2/25/20`),stroke=F, alpha=0.7) +
  scale_size_continuous(name="Cases", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  # scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
  scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  theme_void() + 
  guides( colour = guide_legend()) +
  labs(caption = "") +
  theme(
    legend.position = "bottom",
    text = element_text(color = "#22211d"),
    plot.background = element_rect(fill = "#ffffff", color = NA), 
    panel.background = element_rect(fill = "#ffffff", color = NA), 
    legend.background = element_rect(fill = "#ffffff", color = NA)
  )+
      ggtitle("Confirmed COVID-19 Cases Feb 25/20")
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 169 rows containing missing values (geom_point).

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/onfirmed_world_feb_25.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 169 rows containing missing values (geom_point).

Covid-19 deaths 2/25/20

ggplot() +
  geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
  geom_point(data=time_series_deaths, aes(x=Long, y=Lat, size=`2/25/20`, color=`2/25/20`),stroke=F, alpha=0.7) +
  scale_size_continuous(name="Cases", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  # scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
  scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  theme_void() + 
  guides( colour = guide_legend()) +
  labs(caption = "") +
  theme(
    legend.position = "bottom",
    text = element_text(color = "#22211d"),
    plot.background = element_rect(fill = "#ffffff", color = NA), 
    panel.background = element_rect(fill = "#ffffff", color = NA), 
    legend.background = element_rect(fill = "#ffffff", color = NA)
  )+
  ggtitle("Confirmed COVID-19 deaths Feb 25/20")
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 211 rows containing missing values (geom_point).

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/deaths_world_feb_25.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 211 rows containing missing values (geom_point).

Confirmed cases 3/24/20

ggplot() +
  geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
  geom_point(data=time_series_confirmed, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7) +
  scale_size_continuous(name="Cases", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  # scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
  scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  theme_void() + 
  guides( colour = guide_legend()) +
  labs(caption = "") +
  theme(
    legend.position = "bottom",
    text = element_text(color = "#22211d"),
    plot.background = element_rect(fill = "#ffffff", color = NA), 
    panel.background = element_rect(fill = "#ffffff", color = NA), 
    legend.background = element_rect(fill = "#ffffff", color = NA)
  )+
  ggtitle("Confirmed COVID-19 Cases March 24/20")
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 5 rows containing missing values (geom_point).

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/confirmed_world_march_24.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 5 rows containing missing values (geom_point).

Covid-19 deaths 3/24/20

ggplot() +
  geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
  geom_point(data=time_series_deaths, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7) +
  scale_size_continuous(name="Deaths", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  # scale_alpha_continuous(name="Deaths", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
  scale_color_viridis_c(option="inferno",name="Deaths", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
  theme_void() + 
  guides( colour = guide_legend()) +
  labs(caption = "") +
  theme(
    legend.position = "bottom",
    text = element_text(color = "#22211d"),
    plot.background = element_rect(fill = "#ffffff", color = NA), 
    panel.background = element_rect(fill = "#ffffff", color = NA), 
    legend.background = element_rect(fill = "#ffffff", color = NA)
  )+
   ggtitle("Confirmed COVID-19 deaths March 24/20")
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 118 rows containing missing values (geom_point).

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/deaths_world_march_24.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 118 rows containing missing values (geom_point).
time_series_confirmed_long2 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
    rename(Province.State = "Province/State", Country.Region = "Country/Region") %>%
  pivot_longer(-c(Province.State, Country.Region, Lat, Long),
    names_to = "Date", values_to = "cumulative_cases") %>%
    mutate(Date = mdy(Date) - days(1),
        Place = paste(Lat,Long,sep="_")) %>%
    group_by(Place,Date) %>%
        summarise(cumulative_cases = ifelse(sum(cumulative_cases)>0,
        sum(cumulative_cases),NA_real_),
        Lat = mean(Lat),
        Long = mean(Long)) %>%
    mutate(Pandemic_day = as.numeric(Date - min(Date)))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed_long2)
## # A tibble: 6 x 6
## # Groups:   Place [1]
##   Place           Date       cumulative_cases     Lat  Long Pandemic_day
##   <chr>           <date>                <dbl>   <dbl> <dbl>        <dbl>
## 1 -0.0236_37.9062 2020-01-21               NA -0.0236  37.9            0
## 2 -0.0236_37.9062 2020-01-22               NA -0.0236  37.9            1
## 3 -0.0236_37.9062 2020-01-23               NA -0.0236  37.9            2
## 4 -0.0236_37.9062 2020-01-24               NA -0.0236  37.9            3
## 5 -0.0236_37.9062 2020-01-25               NA -0.0236  37.9            4
## 6 -0.0236_37.9062 2020-01-26               NA -0.0236  37.9            5
 ggplot(subset(time_series_confirmed_long2, Date %in% seq(min(Date),max(Date),7)),
            aes(x = Long, y = Lat, size = cumulative_cases/1000)) +
            borders("world", colour = NA, fill = "grey90") +
            theme_bw() +
            geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
            labs(title = 'COVID-19 spread',x = '', y = '',
                 size="Cases (x1000))") +
            theme(legend.position = "right") +
            coord_fixed(ratio=1)+
            facet_wrap(.~Date,nrow=3)
## Warning: Removed 1316 rows containing missing values (geom_point).

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/static.png", dpi = "screen")
## Saving 7 x 5 in image
## Warning: Removed 1316 rows containing missing values (geom_point).

Latin America

#Latin countries
some.latin <- c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay")
#retrieve map data
some.latin <- map_data("world", region = some.latin)
#Coordinates of countries
region.coord.data <- some.latin %>%
  group_by(region) %>%
  summarise(long = mean(long), lat = mean(lat))
#Confirmed cases of COVID-19 in Latin America 
time_series_confirmed_latin <- time_series_confirmed %>% 
  filter (Country.Region %in% c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay"))
#Confirmed Deaths of COVID-19 in Latin America
time_series_deaths_latin <- time_series_deaths %>% 
  filter(Country.Region %in% c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay"))

COVID-19 Confirmed Cases in South America reported on March 24/20

mybreaks2 <- c(1, 50, 100, 200, 300, 1000, 2000)
ggplot() +
    geom_polygon(data = some.latin, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3,colour='white') +
  geom_point(data=time_series_confirmed_latin, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7)+
  scale_size_continuous(name="Cases", trans="log", range=c(1,20),breaks=mybreaks2, labels = c("1-49", "50-99", "100-199", "200-299", "300-399", "1000-1999",'2000+')) +
  # scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
  scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks2, labels = c("1-49","50-99", "100-199", "200-299", "300-399", "1000-1999",'2000+')) +
  theme_void() + 
  guides( colour = guide_legend()) +
  labs(caption = "") +
  theme(
    legend.position = "bottom",
    legend.text = element_text(size = 30),
    legend.title = element_text(size = 30),
    text = element_text(color = "#22211d"),
    plot.background = element_rect(fill = "#ffffff", color = NA), 
    panel.background = element_rect(fill = "#ffffff", color = NA), 
    legend.background = element_rect(fill = "#ffffff", color = NA))+
  geom_text(aes(x=long, y=lat,label=region), data = region.coord.data, size =5, hjust=0.5)+
   ggtitle("COVID-19 Confirmed Cases in South America reported on March 24/20")

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/COVID-19-Confirmed-Cases-South-America-March-24.png", width = 14, height = 12, dpi = "screen")

COVID-19 Confirmed Deaths in South America reported on March 24/20

mybreaks3 <- c(5,15, 25, 35, 45)
ggplot() +
    geom_polygon(data = some.latin, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3,colour='white') +
  geom_point(data=time_series_deaths_latin, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7)+
  scale_size_continuous(name="Deaths", trans="log", range=c(1,20),breaks=mybreaks3, labels = c('0-9', '10-19', '20-39', '40-49','50+')) +
  # scale_alpha_continuous(name="Deaths", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
  scale_color_viridis_c(option="inferno",name="Deaths", trans="log",breaks=mybreaks3, labels = c('0-9', '10-19', '20-39', '40-49', '50+')) +
  theme_void() + 
  guides( colour = guide_legend()) +
  labs(caption = "") +
  theme(
    legend.position = "bottom",
    legend.text = element_text(size = 30),
    legend.title = element_text(size = 30),
    text = element_text(color = "#22211d"),
    plot.background = element_rect(fill = "#ffffff", color = NA), 
    panel.background = element_rect(fill = "#ffffff", color = NA), 
    legend.background = element_rect(fill = "#ffffff", color = NA))+
  geom_text(aes(x=long, y=lat,label=region), data = region.coord.data, size =5, hjust=0.5)+
  ggtitle("COVID-19 Confirmed Deaths in South America reported on March 24/20")
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 3 rows containing missing values (geom_point).

ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/COVID-19-Confirmed-deaths-South-America-March-24.png", width = 14, height = 12, dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 3 rows containing missing values (geom_point).

Latin America

#Latin cases 
time_series_confirmed_long2_latin <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
    rename(Province.State = "Province/State", Country.Region = "Country/Region") %>%
  filter (Country.Region %in% c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay")) %>% 
  pivot_longer(-c(Province.State, Country.Region, Lat, Long),
    names_to = "Date", values_to = "cumulative_cases") %>%
    mutate(Date = mdy(Date) - days(1),
        Place = paste(Lat,Long,sep="_")) %>%
    group_by(Place,Date) %>%
        summarise(cumulative_cases = ifelse(sum(cumulative_cases)>0,
        sum(cumulative_cases),NA_real_),
        Lat = mean(Lat),
        Long = mean(Long)) %>%
    mutate(Pandemic_day = as.numeric(Date - min(Date))) %>% 
  filter(!is.na(cumulative_cases)) 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
ggplot(subset(time_series_confirmed_long2_latin, Date %in% seq(min(Date),max(Date),4)),
            aes(x = Long, y = Lat, size = cumulative_cases/100))+
            borders('world',colour = NA, fill = "grey90", regions = c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay"))+
  theme_bw() +
            geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
  labs(title = 'COVID-19 spread',x = '', y = '',
                 size="Cases (x1000))") +
            theme(legend.position = "right") +
            coord_fixed(ratio=1)+
            facet_wrap(.~Date,nrow=3)+
  geom_text(aes(x=long, y=lat,label=region), data = region.coord.data, size =2, hjust=0.5)

map_colombia <- world %>% 
  filter(region == 'Colombia')

COVID-19 spread Colombia

#Colombia cases 
time_series_confirmed_long2_Colombia <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
    rename(Province.State = "Province/State", Country.Region = "Country/Region") %>%
  filter(Country.Region == 'Colombia') %>%
  pivot_longer(-c(Province.State, Country.Region, Lat, Long),
    names_to = "Date", values_to = "cumulative_cases") %>%
    mutate(Date = mdy(Date) - days(1),
        Place = paste(Lat,Long,sep="_")) %>%
    group_by(Place,Date) %>%
        summarise(cumulative_cases = ifelse(sum(cumulative_cases)>0,
        sum(cumulative_cases),NA_real_),
        Lat = mean(Lat),
        Long = mean(Long)) %>%
    mutate(Pandemic_day = as.numeric(Date - min(Date))) %>% 
filter(!is.na(cumulative_cases)) 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
ggplot(subset(time_series_confirmed_long2_Colombia, Date %in% seq(min(Date),max(Date),3)),
            aes(x = Long, y = Lat, size = cumulative_cases/100))+
            borders('world',colour = NA, fill = "grey90", regions = 'Colombia')+
  theme_bw() +
            geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
  labs(title = 'COVID-19 spread',x = '', y = '',
                 size="Cases (x1000))") +
            theme(legend.position = "right") +
            coord_fixed(ratio=1)+
            facet_wrap(.~Date,nrow=3)

```